
library(dplyr)
library(openxlsx)

#######################################################
## Norway 
#######################################################

NES_no <- read_excel("NES_covariances_Norway.xlsx")
d <- read_excel("Main dataset for Norway.xlsx")

d$var_income <- NA
d$var_gender <- NA

for(i in 1:nrow(d)){
  drow <- d[i,]
  # INCOME/SUPPORT VARIANCES AND COVARIANCE
  d1 <- drow %>% dplyr::select(starts_with("S_HINC"))
  d1 <- as.data.frame(t(d1))
  d1$prop <- d1$V1/sum(d1$V1, na.rm=T)
  d1$cumu <- cumsum(d1$prop)
  d1 <- mutate(d1, score = ((cumu - lag(cumu))/2)+lag(cumu)) # Make percentile midpoint scores
  d1$score[1] <- d1$cumu[1]-(d1$cumu[1]/2) # Make percentile midpoint score for first category since formula above returns NA for that one
  d1$income <- d1$Var1
  d1 <- add_rownames(d1, "income")
  d1$income <- row.names(d1)
  names(d1)[names(d1)=="V1"] <- "n"
  d1 <- dplyr::select(d1, income, score, n)
  d2 <- drow %>% dplyr::select(starts_with("pcFAV_INC"))
  d2 <- as.data.frame(t(d2)) 
  d2$income <- 1:nrow(d2) 
  names(d2)[names(d2)=="V1"] <- "sup"
  d3 <- merge(d1,d2, by="income")
  d3<-na.omit(d3)
  d4<- d3 %>% dplyr::select(score,sup)
  sup_inc_cov<-cov.wt(d4,wt=d3$n)$cov[1,2]
  inc_var<-cov.wt(d4,wt=d3$n)$cov[1,1]
  inc_mean <- weighted.mean(d3$score,w=d3$n)
  
  # SEX/SUPPORT VARIANCES AND COVARIANCE
  d1 <- drow %>% dplyr::select(starts_with("S_SEX"))
  d1 <- as.data.frame(t(d1))
  d1$sex <- rownames(d1)
  d1$sex <- ifelse(d1$sex=="S_SEX_MALE",1,0)
  names(d1)[names(d1)=="V1"] <- "n"
  d1 <- dplyr::select(d1, sex, n)
  d2 <- drow %>% dplyr::select(starts_with("pcFAV_SEX"))
  d2 <- as.data.frame(t(d2)) 
  d2$sex <- rownames(d2)
  d2$sex <- ifelse(d2$sex=="pcFAV_SEX_MEN",1,0)
  names(d2)[names(d2)=="V1"] <- "sup"
  d3 <- merge(d1,d2, by="sex")
  cor(d3$sup,d3$sex,use="complete.obs")
  d3<-na.omit(d3)
  d3$prop <- d3$n/sum(d3$n, na.rm=T)
  d4<- d3 %>% dplyr::select(sex,sup)
  sup_sex_cov<-cov.wt(d4,wt=d3$n,method="ML")$cov[1,2]
  sex_var<-cov.wt(d4,wt=d3$n,method="ML")$cov[1,1]
  sex_mean <- weighted.mean(d3$sex,w=d3$n)

  d$var_income[i] <- inc_var
  d$var_gender[i] <- sex_var
}

no_data_vars <- d %>% 
  group_by(ASKED_YEAR) %>%
  dplyr::summarise(var_inc=mean(var_income,na.rm=T),
                   var_sex=mean(var_gender,na.rm=T)) %>%
  dplyr::rename(year=ASKED_YEAR) 
no_data_vars$source <- "Opinion-Policy Dataset"
NES_no$source <- "National Election Surveys"
no_data_vars <- rbind.fill(NES_no,no_data_vars)
no_data_vars$country <- "Norway"

#######################################################
## USA 
#######################################################

NES_us <- read_excel("NES_covariances_USA.xlsx")

d <- read_sav("Main dataset for United States, from Gilens 2012.xlsx")

d <- d %>% dplyr::filter(!is.na(PRED90_SW)&MALE_FAV!=0)

######## MIDDLE STOP ##########
d$pcFAV_SEX_MEN <- d$MALE_FAV/(d$MALE_FAV+d$MALE_OPP)
d$pcFAV_SEX_WOMEN <- d$FEMALE_FAV/(d$FEMALE_FAV+d$FEMALE_OPP)
d$S_SEX_MEN <- d$MALE_FAV+d$MALE_OPP
d$S_SEX_FEMALE <- d$FEMALE_FAV+d$FEMALE_OPP

# Make percentage variables HOUSEHOLD income 
d$pcFAV_INC1 <- d$INC1_FAV/(d$INC1_FAV+d$INC1_OPP)
d$pcFAV_INC2 <- d$INC2_FAV/(d$INC2_FAV+d$INC2_OPP)
d$pcFAV_INC3 <- d$INC3_FAV/(d$INC3_FAV+d$INC3_OPP)
d$pcFAV_INC4 <- d$INC4_FAV/(d$INC4_FAV+d$INC4_OPP)
d$pcFAV_INC5 <- d$INC5_FAV/(d$INC5_FAV+d$INC5_OPP)
d$pcFAV_INC6 <- d$INC6_FAV/(d$INC6_FAV+d$INC6_OPP)
d$pcFAV_INC7 <- d$INC7_FAV/(d$INC7_FAV+d$INC7_OPP)
d$pcFAV_INC8 <- d$INC8_FAV/(d$INC8_FAV+d$INC8_OPP)
d$pcFAV_INC9 <- d$INC9_FAV/(d$INC9_FAV+d$INC9_OPP)
d$pcFAV_INC10 <- d$INC10_FAV/(d$INC10_FAV+d$INC10_OPP)
d$pcFAV_INC11 <- d$INC11_FAV/(d$INC11_FAV+d$INC11_OPP)

# Set N for each income group to N who responded
d$S_HINC1 <- (d$INC1_FAV+d$INC1_OPP)
d$S_HINC2 <- (d$INC2_FAV+d$INC2_OPP)
d$S_HINC3 <- (d$INC3_FAV+d$INC3_OPP)
d$S_HINC4 <- (d$INC4_FAV+d$INC4_OPP)
d$S_HINC5 <- (d$INC5_FAV+d$INC5_OPP)
d$S_HINC6 <- (d$INC6_FAV+d$INC6_OPP)
d$S_HINC7 <- (d$INC7_FAV+d$INC7_OPP)
d$S_HINC8 <- (d$INC8_FAV+d$INC8_OPP)
d$S_HINC9 <- (d$INC9_FAV+d$INC9_OPP)
d$S_HINC10 <- (d$INC10_FAV+d$INC10_OPP)
d$S_HINC11 <- (d$INC11_FAV+d$INC11_OPP)

# OTHER 
d$ASKED_YEAR <- d$YEAR
d$ORG_DATASET_N <- d$MALE_FAV+d$MALE_OPP+d$MALE_DK+d$FEMALE_FAV+d$FEMALE_OPP+d$FEMALE_DK

d$OVERALL_FAV <- d$MALE_FAV+d$FEMALE_FAV
d$OVERALL_OPP <- d$MALE_OPP+d$FEMALE_OPP

d$var_income <- NA
d$var_gender <- NA

for(i in 1:nrow(d)){
  drow <- d[i,]
  # INCOME/SUPPORT VARIANCES AND COVARIANCE
  d1 <- drow %>% dplyr::select(starts_with("S_HINC"))
  d1 <- as.data.frame(t(d1))
  d1$prop <- d1$V1/sum(d1$V1, na.rm=T)
  d1$cumu <- cumsum(d1$prop)
  d1 <- mutate(d1, score = ((cumu - lag(cumu))/2)+lag(cumu)) # Make percentile midpoint scores
  d1$score[1] <- d1$cumu[1]-(d1$cumu[1]/2) # Make percentile midpoint score for first category since formula above returns NA for that one
  d1$income <- d1$Var1
  d1 <- add_rownames(d1, "income")
  d1$income <- row.names(d1)
  names(d1)[names(d1)=="V1"] <- "n"
  d1 <- dplyr::select(d1, income, score, n)
  d2 <- drow %>% dplyr::select(starts_with("pcFAV_INC"))
  d2 <- as.data.frame(t(d2)) 
  d2$income <- 1:nrow(d2) 
  names(d2)[names(d2)=="V1"] <- "sup"
  d3 <- merge(d1,d2, by="income")
  d3<-na.omit(d3)
  d4<- d3 %>% dplyr::select(score,sup)
  sup_inc_cov<-cov.wt(d4,wt=d3$n)$cov[1,2]
  inc_var<-cov.wt(d4,wt=d3$n)$cov[1,1]
  inc_mean <- weighted.mean(d3$score,w=d3$n)
  
  # SEX/SUPPORT VARIANCES AND COVARIANCE
  d1 <- drow %>% dplyr::select(starts_with("S_SEX"))
  d1 <- as.data.frame(t(d1))
  d1$sex <- rownames(d1)
  d1$sex <- ifelse(d1$sex=="S_SEX_MEN",1,0)
  names(d1)[names(d1)=="V1"] <- "n"
  d1 <- dplyr::select(d1, sex, n)
  d2 <- drow %>% dplyr::select(starts_with("pcFAV_SEX"))
  d2 <- as.data.frame(t(d2)) 
  d2$sex <- rownames(d2)
  d2$sex <- ifelse(d2$sex=="pcFAV_SEX_MEN",1,0)
  names(d2)[names(d2)=="V1"] <- "sup"
  d3 <- merge(d1,d2, by="sex")
  cor(d3$sup,d3$sex,use="complete.obs")
  d3<-na.omit(d3)
  d3$prop <- d3$n/sum(d3$n, na.rm=T)
  d4<- d3 %>% dplyr::select(sex,sup)
  sup_sex_cov<-cov.wt(d4,wt=d3$n,method="ML")$cov[1,2]
  sex_var<-cov.wt(d4,wt=d3$n,method="ML")$cov[1,1] # NB: Variance for "sup" NOT CORRECT (Calculated with correct weights below)
  sex_mean <- weighted.mean(d3$sex,w=d3$n)

  d$var_income[i] <- inc_var
  d$var_gender[i] <- sex_var
}
us_data_vars <- d %>% 
  group_by(ASKED_YEAR) %>%
  dplyr::summarise(var_inc=mean(var_income,na.rm=T),
                   var_sex=mean(var_gender,na.rm=T)) %>%
  dplyr::rename(year=ASKED_YEAR) 
us_data_vars$source <- "Opinion-Policy Dataset"
NES_us$source <- "National Election Surveys"
us_data_vars <- rbind.fill(NES_us,us_data_vars)
us_data_vars$country <- "United States"

#######################################################
## Combine Norway and US data
#######################################################

data_vars <- rbind(no_data_vars,us_data_vars)

write.xlsx(data_vars, "Variance comparison data.xlsx")


